Last modified 10:57 AM on May 11, 2016. This document, R session image, version history, knitr cache, figures, and other associated datasets are located in /inside/grotto/blin/trna-markers/luad/predict/.
library(GenomicRanges)
library(RColorBrewer)
library(pheatmap)
library(plyr)
load('/inside/home/blin/grotto/trna-markers/process-reads/luad-counts.RData')
load('/inside/grotto/blin/data/hg19-srnas.RData')
excludeExtraneousGenes = function(counts, threshold = 0.96) {
cor = cor(t(counts))
cor[lower.tri(cor)] = NA # remove duplicates (x-y and y-x)
cor_genes = data.frame(cbind(which(!is.na(cor), arr.ind = TRUE), na.omit(as.vector(cor))))
cor_genes$row = rownames(cor)[cor_genes$row]
cor_genes$col = colnames(cor)[cor_genes$col]
cor_genes = cor_genes[cor_genes$V3 != 1 & cor_genes$V3 > threshold, ]
if (nrow(cor_genes) == 0) return(rownames(cor))
extraneous_genes = c()
ok_genes = c()
for (i in 1:nrow(cor_genes)) {
gene1 = cor_genes$row[i]
gene2 = cor_genes$col[i]
if (!(gene1 %in% extraneous_genes) & !(gene1 %in% ok_genes)) {
if (!(gene2 %in% extraneous_genes) & !(gene2 %in% ok_genes)) {
ok_genes = c(ok_genes, gene1)
extraneous_genes = c(extraneous_genes, gene2)
}
else {
extraneous_genes = c(extraneous_genes, gene1)
}
}
else {
extraneous_genes = c(extraneous_genes, gene2)
}
}
ok_genes
}
filterVariance = function(counts, var = 0.5) {
gene_var = sapply(data.frame(t(counts)), var)
names(gene_var) = rownames(counts)
counts[which(gene_var > quantile(gene_var, 0.5)), ]
}
Default metadata for all heatmaps
heatmap_metadata = luad_metadata[luad_metadata$barcode %in% colnames(luad_adjusted_counts), ]
# subtype
load('/inside/grotto/blin/trna-markers/luad/cluster-subtypes/clusters.RData')
clusters$subtype[!clusters$known] = "Unknown"
heatmap_metadata$subtype = as.factor(clusters$subtype[match(substr(heatmap_metadata$barcode, 1, 12), clusters$barcode)])
# stage
levels(heatmap_metadata$stage) = list(I = c("Stage I", "Stage IA", "Stage IB"), II = c("Stage II", "Stage IIA", "Stage IIB"), III = c("Stage III", "Stage IIIA", "Stage IIIB"), IV = "Stage IV", Unknown = '[Not Available]')
# n stage
levels(heatmap_metadata$n_stage) = list(N0 = "N0", N1 = "N1", N2 = "N2", N3 = "N3", Unknown = c("NX", "[Not Available]"))
# smoker
levels(heatmap_metadata$smoker) = list(Smoker = 1, Nonsmoker = 0, Unknown = NA)
# heatmap annotation
levels(heatmap_metadata$sample_type) = list(TP = "TP", NT = "NT")
annot_col = data.frame(stage = heatmap_metadata$stage, subtype = heatmap_metadata$subtype, n_stage = heatmap_metadata$n_stage, sample_type = heatmap_metadata$sample_type, smoker = heatmap_metadata$smoker)
rownames(annot_col) = colnames(luad_adjusted_counts)
annot_row = data.frame(sRNA = as.factor(srnas[match(rownames(luad_adjusted_counts), srnas$tx_name), ]$class))
rownames(annot_row) = rownames(luad_adjusted_counts)
annot_colors = list(stage = setNames(c(brewer.pal(4, "Set1"), "grey"), levels(heatmap_metadata$stage)),
subtype = setNames(c(brewer.pal(3, "Set2"), "grey"), levels(heatmap_metadata$subtype)),
n_stage = setNames(c(brewer.pal(4, "Set3"), "grey"), levels(heatmap_metadata$n_stage)),
sample_type = setNames(c("#EF8A62", "#67A9CF"), levels(heatmap_metadata$sample_type)),
smoker = setNames(c("#EF8A62", "#67A9CF", "grey"), levels(heatmap_metadata$smoker)),
sRNA = setNames(c(brewer.pal(9, "Paired"), "grey"), levels(annot_row$sRNA)))
df = luad_adjusted_counts df = df[rownames(df) %in% excludeExtraneousGenes(df), ] pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "Reds"))
Fold change is relative to matched tumor-normal pairs or median normal expression.
getLog2FC = function(counts, metadata) {
counts = counts - min(counts) + 1
paired_tp_samples = which(metadata$sample_type == "TP" & metadata$paired)
log2fc = data.frame(numeric(nrow(counts)))
median_nt_expression = apply(counts[, paired_tp_samples+1], 1, median)
for (i in 1:ncol(counts)) {
sample_metadata = metadata[i, ]
if (sample_metadata$sample_type == "NT") next
tp_counts = counts[, i]
if (sample_metadata$paired) nt_counts = counts[, i+1]
else nt_counts = median_nt_expression
log2fc = cbind(log2fc, log2(tp_counts) - log2(nt_counts))
}
colnames(log2fc) = metadata[metadata$sample_type == "TP", ]$barcode
log2fc[, -1]
}
log2fc = getLog2FC(luad_adjusted_counts, luad_metadata)
metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(log2fc), ] pheatmap(log2fc, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
# filter out genes by correlation genelist = excludeExtraneousGenes(log2fc) table(srnas[match(genelist, srnas$tx_name), ]$class)
## ## genomic-tRF-3 genomic-tRF-5 snoRNA tRF-1 tRF-3 ## 116 120 21 23 52 ## tRF-5 tRF-i tRNA ## 4 23 194
df = log2fc[rownames(log2fc) %in% genelist, ] # filter out genes by fold change variance df = filterVariance(df, 0.5) # filter metadata metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(df), ] pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
df = log2fc[srnas[match(rownames(log2fc), srnas$tx_name), ]$class == "miRNA", ] # filter for gene variance and extraneous genes df = df[rownames(df) %in% excludeExtraneousGenes(df), ] df = filterVariance(df, 0.9) metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(df), ] # heatmap annotation annot_colors$sRNA = NA pheatmap(df, annotation_col = annot_col, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
df = log2fc[srnas[match(rownames(log2fc), srnas$tx_name), ]$class %in% c("tRF-1", "tRF-3", "tRF-5", "tRF-i"), ]
# filter for gene variance and extraneous genes
genelist = excludeExtraneousGenes(df, 0.98)
df = df[rownames(df) %in% genelist, ]
df = filterVariance(df, 0.5)
metadata = heatmap_metadata[heatmap_metadata$barcode %in% colnames(df), ]
# heatmap annotation
annot_row = data.frame(tRF = as.factor(srnas[match(rownames(df), srnas$tx_name), ]$class))
rownames(annot_row) = rownames(df)
annot_colors$sRNA = NA
annot_colors$tRF = setNames(c(brewer.pal(4, "Dark2"), "grey"), levels(annot_row$tRF))
pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = TRUE, trace = "none", color = brewer.pal(10, "RdBu"))
pheatmap(df, annotation_col = annot_col, annotation_row = annot_row, annotation_colors = annot_colors, fontsize = 9, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
df2 = df[, metadata[order(metadata$subtype), ]$barcode]
df2 = df2[, metadata[order(metadata$subtype), ]$subtype %in% c("PP", "PI", "TRU")]
annot_col2 = annot_col[colnames(df2), ]
pheatmap(df2, annotation_col = annot_col2, annotation_row = annot_row, annotation_colors = annot_colors, cluster_cols = FALSE, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
df2 = df[, metadata[order(metadata$stage), ]$barcode]
df2 = df2[, metadata[order(metadata$stage), ]$stage %in% c("I", "II", "III", "IV")]
annot_col2 = annot_col[colnames(df2), ]
pheatmap(df2, annotation_col = annot_col2, annotation_row = annot_row, annotation_colors = annot_colors, cluster_cols = FALSE, fontsize = 8, scale = "row", show_colnames = FALSE, show_rownames = FALSE, trace = "none", color = brewer.pal(10, "RdBu"))
## Saving search path.. ## Saving list of loaded packages.. ## Saving all data... ## Done.